{R script}

\begin{scriptsize}
\begin{lstlisting}
library(openxlsx)
library(detectRUNS)
library(tidyr)
library(ggplot2)
library(dplyr)
library(reshape2)
library(Rcpp)
library(gridExtra)
library(data.table)
library(plyr)
library(tidyverse)

slidingRuns_hom <- slidingRUNS.run(
genotypeFile = 'plink.ped', 
mapFile = 'plink.map', 
windowSize = 20,
minSNP = 20,
maxOppWindow = 1,
maxMissWindow = 1,
maxGap = 250000, #bps
minLengthBps = 1000000,#bps
minDensity = 1/70, # SNP/kbps
threshold = 0.05,
ROHet = FALSE, 
maxOppRun = NULL,
maxMissRun = NULL
) 
summaryList <- summaryRuns(
runs = slidingRuns_hom, mapFile = 'plink.map',
genotypeFile = 'plink.ped', 
Class = 2, snpInRuns = TRUE)

slidingRuns_hom %>%
dplyr::summarise(N =length(lengthBps),
media=mean(lengthBps/(summaryList$result_Froh_genome_wide$sum/
summaryList$result_Froh_genome_wide$Froh_genome)[1])*100,
desvio=sd(lengthBps/(summaryList$result_Froh_genome_wide$sum/
summaryList$result_Froh_genome_wide$Froh_genome)[1])*100,
cv = desvio/media)


slidingRuns_hom %>%
dplyr::group_by(group) %>%
dplyr::summarise(N =length(lengthBps),
media=mean(lengthBps/(summaryList$result_Froh_genome_wide$sum/
summaryList$result_Froh_genome_wide$Froh_genome)[1])*100,
desvio=sd(lengthBps/
(summaryList$result_Froh_genome_wide$sum/
summaryList$result_Froh_genome_wide$Froh_genome)[1])*100,
cv = desvio/media)

tab1 <- slidingRuns_hom %>%
dplyr::group_by(group) %>%
dplyr::summarise(N =length(lengthBps),
valor_length = gsub(".",",",
x=paste0(round(mean(lengthBps/10^6),2)," + ",
round(sd(lengthBps/10^6),2)),fixed = T),
minimo_length=round(min(lengthBps/10^6),2),
maximo_length=round(max(lengthBps/10^6),2))
tab2 <- slidingRuns_hom %>%
dplyr::group_by(group,id) %>%
dplyr::summarise(lengthBps =length(lengthBps)) %>%
dplyr::group_by(group) %>%
dplyr::summarise(valor_number = gsub(".",",",
x=paste0(round(mean(lengthBps),2)," + ",
round(sd(lengthBps),2)),fixed = T),
minimo_number=round(min(lengthBps),2),
maximo_number=round(max(lengthBps),2))
tab_final <- tab1 %>%
left_join(tab2)
getwd()
write.xlsx(tab_final,"estatistica_roh.xlsx")
#### ILHAS DE HOMOZIGOSIDADE, PERCENTIS
x <- summaryList$SNPinRun %>%
as.data.table()
x <- x[,top99 :=quantile(COUNT,.99), by=BREED] 
x_select <- x %>%
filter(top99<COUNT)
x <- x %>%
dplyr::group_by(CHR,BREED) %>%
dplyr::summarise(x=sum(top99<COUNT)) %>%
filter(x>30) %>%
spread(key="BREED",value="x",fill = 0)
write.xlsx(x,"top_30.xlsx")
write.xlsx(x_select,"homozigose_99.xlsx")
library(base)
## calculating the average runs of homozygosity per chromosome by breed
summaryList$summary_ROH_mean_chr
media_RoH_cromo <-slidingRuns_hom %>%
dplyr::group_by(chrom=as.numeric(chrom),group) %>%
dplyr::summarise(N=length(id)) 
ggplot(data=media_RoH_cromo) +
geom_bar(aes(x=chrom,fill=group,y=N),
stat = "identity",position = "dodge")
## Calculating the average number of runs per chromosome 
summaryList$summary_ROH_mean_chr
media_RoH_cromo <-slidingRuns_hom %>%
dplyr::group_by(chrom=as.numeric(chrom)) %>%
dplyr::summarise(N=length(id)) 
media_RoH_cromo$prop <- media_RoH_cromo$N/sum(media_RoH_cromo$N)
ylim.prim <- range(media_RoH_cromo$N)
ylim.sec <- range(media_RoH_cromo$prop)
b <- diff(ylim.prim)/diff(ylim.sec)
a <- ylim.prim[1] - b*ylim.sec[1]

tiff("figuras/Figure 2.tiff",width=25,height=15,res=300,units="cm")
ggplot(data=media_RoH_cromo) +
geom_bar(aes(x=chrom,y=N,
fill=chrom),stat = "identity",position = "dodge") +
geom_line(aes(x=chrom,y=N),col="red",size=1, show.legend = FALSE) +
xlab("Chromosome") +
ylab("Number of RoH")+
scale_x_continuous(
breaks = 1:29)+
scale_color_gradient2(midpoint=15, low="lightblue", mid="blue",
high="darkblue" )+
theme(legend.position = "none")+
scale_y_continuous(sec.axis = sec_axis(~ (. - a)/b, 
name = "Percentage %",
labels = scales::label_percent()))
dev.off()

## Statistics - NUMBER AND LENGHT ROH FOR BREED
library(dplyr)
library(plyr)
detach(package:plyr)
number=slidingRuns_hom %>% group_by(group,id) %>% tally() %>%
select(-one_of(setdiff(names(slidingRuns_hom), "group"))) %>% 
summarise(mean = mean(n), sd = sd(n), min = min(n), max = max(n))
write.table(number,'number.txt')
length=slidingRuns_hom %>% group_by(group) %>%
summarise(mean = mean(lengthBps), sd = sd(lengthBps), 
min = min(lengthBps), max = max(lengthBps))
write.table(length,'length.txt')

summaryList$summary_ROH_count_chr
summaryList$result_Froh_chromosome_wide
res <- summaryList$summary_ROH_count
write.table(summaryList$summary_ROH_count,'resumo_F_ROH.txt1')
res$cat <- rownames(res)
res <-  res %>% gather(key = "raca",value = "ncorrida",-cat)

# Mean of FROH by race
froh <- Froh_inbreeding(runs = slidingRuns_hom, mapFile = 'plink.map')
write.table(froh,'froh.txt')
inbreeding_statistics <- froh %>%
dplyr::group_by(group) %>%
dplyr::summarise(N = length(Froh_genome),
mean=paste0(round(mean(Froh_genome)*100,2),"%"),
std=paste0(round(sd(Froh_genome)*100,2),"%"),
cv = paste0(round(sd(Froh_genome)/
mean(Froh_genome)*100,2),"%"),
percent_ind_inf_10percent = 
paste0(round(mean(Froh_genome<0.10)*100,2),"%"),
percent_ind_inf_15percent = 
paste0(round(mean(Froh_genome<0.15)*100,2),"%")) 
write.xlsx(inbreeding_statistics,"inbreeding_statistics_homo.xlsx")


dados_grafico1 <- summaryList$summary_ROH_count_chr
dados_grafico2 <- summaryList$summary_ROH_percentage_chr
dados_grafico1$chr <- rownames(dados_grafico1)
dados_grafico <- dados_grafico1 %>%
left_join(dados_grafico2,by=c("chr"="chrom"))
dados_grafico_t  <- dados_grafico %>%
gather(key = "variavel",value="valor",-chr) %>%
mutate(raca=gsub(pattern= "(.*)\\.\\w",replacement="\\1",variavel),
tipo=gsub(pattern="(.*)\\.(\\w)",replacement="\\2",variavel),
chr = as.numeric(chr)) %>%
select(-variavel) %>%
spread(key="tipo",value="valor")
scaleFactor <-dados_grafico_t %>%
dplyr::group_by(raca) %>%
dplyr::summarise(mx = max(x) ,
my = max(y)) %>%
mutate(scl = mx/my)
a <-summaryList$result_Froh_genome_wide
table(a$sum/a$Froh_genome)
2464792553 
dados_grafico_t$tamanho <- dados_grafico_t$x/dados_grafico_t$y

# write.table (summaryList,'resumo geral.txt')
summaryList$result_Froh_genome_wide
res <- summaryList$summary_ROH_count
write.table (summaryList$summary_ROH_count,'resumo_F_ROH.txt1')
res$cat <- rownames(res)
res <-  res %>% gather(key = "raca",value = "ncorrida",-cat)
# write.table (summaryList$summary_ROH_mean_chr,'resumo_F_ROH.txt')
# # res$cat <- rownames(res)
# res <-  res %>% gather(key = "raca",value = "ncorrida",-chrom)

a <- summaryList$summary_ROH_mean_chr
a <- a %>% gather(-chrom,key="raca",value="media")
library(ggplot2) 
gplot(a) +
geom_density(aes(x=media,group=raca,col=raca))

library(tidyverse)
tiff("Figure3_Class.tiff",width=20,height=10,res=300,units="cm")
ggplot(data=res) + 
geom_bar(aes(x = fct_inorder(cat),weight =ncorrida,fill=raca),
position = "dodge") +
labs(fill="Breed")+
xlab("ROH length category")+
ylab("Frequency")
dev.off()

tiff("Figure 4a.tiff",width=25,height=15,res=300,units="cm")
Figure3<-plot_manhattanRuns(
runs =slidingRuns_hom[slidingRuns_hom$group=="Marota",], 
genotypeFile = 'plink.ped', mapFile = 'plink.map',
plotTitle = "MANHATHANN PLOT-% SNP in Runs",)
dev.off()
tiff("Figure 4b.tiff",width=25,height=15,res=300,units="cm")
Figure3<-plot_manhattanRuns(
runs =slidingRuns_hom[slidingRuns_hom$group=="Anglonubiana",], 
genotypeFile = 'plink.ped', mapFile = 'plink.map',
plotTitle = "MANHATHANN PLOT-% SNP in Runs",)
dev.off()

## boxplot
tiff("Figure 5 - BOXPLOT.tiff",width=25,height=15,res=300,units="cm")
ggplot(froh) +
geom_boxplot(aes(x=group,y=Froh_genome,fill=group))+
labs(fill="Breed")+
xlab("") + ylab("FROH genome") + 
scale_y_continuous(labels = scales::percent)
dev.off()
tiff("Figure 6.tiff",width=25,height=15,res=300,units="cm")
plot_InbreedingChr(runs = slidingRuns_hom, mapFile = 'plink.map', style='All',
savePlots = TRUE,outputName = "inbreeding")
dev.off()
tiff("Figure 7.tiff",width=25,height=15,res=300,units="cm")
plot_InbreedingChr(runs = slidingRuns_hom, mapFile = 'plink.map', style='All',
savePlots = TRUE,outputName = "inbreeding")
dev.off()

##savedRunFile<- system.file("extdata","diniz2023_goat-subset.sliding.csv",
package = "detectRUNS")
runs<-readExternalRuns(inputFile = saveddeRunFile,
program = "detectRuns")
head(runs)
# gerando manual
a11 <-  slidingRuns_hom[slidingRuns_hom$chrom==11,]
ggplot(a11) +
geom_line(aes(x=((from+to)/2)/1000000,y=nSNP,col=group))
tiff("Figure 7a (chromo 11).tiff",width=25,
height=15,res=300,units="cm")
plot_SnpsInRuns(
runs = slidingRuns_hom[slidingRuns_hom$chrom==11,],
genotypeFile = 'plink.ped', 
mapFile = 'plink.map')
dev.off()
tiff("Figure 7b (chromo 15).tiff",width=25,height=15,res=300,units="cm")
plotSnpsInRuns(
runs = slidingRuns_hom[slidingRuns_hom$chrom==15,],
genotypeFile = 'plink.ped', 
mapFile = 'plink.map')
dev.off()
## Identify top RUNS
topRuns <- tableRuns(
runs =  slidingRuns_hom, 
genotypeFile = 'plink.ped',
mapFile = 'plink.map', 
threshold = 0.50)
write.table (topRuns,'topRuns.txt')
res$cat <- rownames(res)
res <-  res %>% gather(key = "raca",
value = "ncorrida",-cat)
consecutiveRuns_het <- consecutiveRUNS.run(
genotypeFile ='plink.ped',
mapFile = 'plink.map',
minSNP = 10,
ROHet = TRUE,
maxGap = 10^6,
minLengthBps = 1000000,
maxOppRun = 5,
maxMissRun = 5
)
summaryList <- summaryRuns(
runs = consecutive_het, mapFile = 'plink.map',
genotypeFile = 'plink.ped', 
Class = 2, snpInRuns = TRUE)
tiff("Figure 18.tiff",width=25,height=15,res=300,units="cm")
plot_manhattanRuns(
runs = consecutiveRuns_het[consecutiveRuns_het$group=="Marota",], 
genotypeFile = 'plink.ped', mapFile = 'plink.map',
plotTitle = "Consecutive HETERO")
dev.off()

hist(froh$Froh_genome,main=NULL,
xlab="inbreeding coefficients of each individual sample")
dev.off()
summaryList <- summaryRuns(
runs = consecutiveRuns_het, mapFile = 'plink.map',
genotypeFile = 'plink.ped', 
Class = 2, snpInRuns = TRUE)
res <- summaryList$summary_ROH_count
res$cat <- rownames(res)
res <-  res %>% gather(key = "raca",value = "ncorrida",-cat)
tiff("Figure 19.tiff",width=25,height=15,res=300,units="cm")
ggplot(data=res) + 
geom_bar(aes(x = cat,weight =ncorrida,fill=raca),
position = "dodge") +
labs(fill="Breed")+
xlab("ROH length category")+
ylab("Frequency")
dev.off()
summaryList$summary_het_count
summaryList$summary_het_mean_chr
plot_Runs(runs = consecutiveRuns_het,
savePlots = TRUE,outputName = "x")
plot_SnpsInRuns(
runs = slidingRuns_het[slidingRuns_hom$chrom==7,], 
genotypeFile = 'plink.ped', 
mapFile = 'plink.map')
plot_manhattanRuns(
runs = slidingRuns_het[slidingRuns_het$group=="MarotaEMBR",], 
genotypeFile = 'plink.ped', 
mapFile = 'plink.map',plotTitle = "TITUTLO")
plot_InbreedingChr(runs = consecutiveRuns_het,
mapFile = 'plink.map', style='All',
savePlots = TRUE,outputName = "inbreeding")
topRuns <- tableRuns(
runs =  consecutiveRuns_het, genotypeFile = 'plink.ped',
mapFile = 'plink.map', 
threshold = 0.5)

##CONSECUTIVERUNS HETEROZIGOSE - used 17.278
consecutiveRuns_het <- consecutiveRUNS.run(
  genotypeFile ='plink.ped',
  mapFile = 'plink.map',
  minSNP = 10,
  ROHet = TRUE,
  maxGap = 10^6,
  minLengthBps = 1000000,
  maxOppRun = 5,
  maxMissRun = 5
)
summaryList <- summaryRuns(
  runs = consecutiveRuns_het, mapFile = 'plink.map', genotypeFile = 'plink.ped', 
  Class = 2, snpInRuns = TRUE)
consecutiveRuns_het <- consecutiveRuns_het %>% as.data.table()

## Identify top RUNS
topRuns <- tableRuns(
  runs =  consecutiveRuns_het, 
  genotypeFile = 'plink.ped',
  mapFile = 'plink.map', 
  threshold = 0.45)

  
## calculating the average runs of homozygosity per chromosome by breed
summaryList$summary_HET_mean_chr
media_HET_cromo <-consecutiveRuns_het %>%
  dplyr::group_by(chrom=as.numeric(chrom),group) %>%
  dplyr::summarise(N=length(id)) 

ggplot(data=media_HET_cromo) +
  geom_bar(aes(x=chrom,fill=group,y=N),stat = "identity",position = "dodge")


## Calculating the average number of runs per chromosome 
summaryList$summary_HET_mean_chr
media_HET_cromo <-consecutiveRuns_het %>%
  dplyr::group_by(chrom=as.numeric(chrom)) %>%
  dplyr::summarise(N=length(id)) 

media_HET_cromo$prop <- media_HET_cromo$N/sum(media_HET_cromo$N)

ylim.prim <- range(media_HET_cromo$N)
ylim.sec <- range(media_HET_cromo$prop)

b <- diff(ylim.prim)/diff(ylim.sec)
a <- ylim.prim[1] - b*ylim.sec[1]
number=consecutiveRuns_het %>% group_by(group,id) %>% tally() %>%
select(-one_of(setdiff(names(consecutiveRuns_het), "group"))) %>%
summarise(mean = mean(n), sd = sd(n), min = min(n), max = max(n))
write.table(number,'number.txt')
length=consecutiveRuns_het %>% group_by(group) %>%
 summarise(mean = mean(lengthBps), sd = sd(lengthBps),
 min = min(lengthBps), max = max(lengthBps))
write.table(length,'length.txt')


summaryList$summary_HET_count_chr
summaryList$result_Fhet_chromosome_wide
res <- summaryList$summary_HET_count
write.table(summaryList$summary_HET_count,'resumo_F_HET.txt1')
res$cat <- rownames(res)
res <-  res %>% gather(key = "raca",value = "ncorrida",-cat)

# Mean of FHET by race
fhet <- Fhet_inbreeding(runs = consecutiveRuns_het, mapFile = 'plink.map')
write.table(Fhet,'fhet.txt')
inbreeding_statistics <- fhet %>%
  dplyr::group_by(group) %>%
  dplyr::summarise( N = length(Fhet_genome),
mean=paste0(round(mean(Fhet_genome)*100,2),"%"),
std=paste0(round(sd(Fheth_genome)*100,2),"%"),
cv = paste0(round(sd(Froh_genome)/mean(Fhet_genome)*100,2),"%"),
percent_ind_inf_10percent = paste0(round(mean(Fhet_genome<0.10)*100,2),"%"),
percent_ind_inf_15percent = paste0(round(mean(Fhet_genome<0.15)*100,2),"%")) 

write.xlsx(inbreeding_statistics,"inbreeding_statistics_het.xlsx")

\end{lstlisting}
\end{scriptsize}

\end{apendicesenv}
% ---